{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "from tqdm import tqdm \n",
    "\n",
    "from sklearn.metrics import confusion_matrix, f1_score, accuracy_score, precision_score, recall_score\n",
    "\n",
    "from sklearn.naive_bayes import GaussianNB\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.neural_network import MLPClassifier\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "import pickle\n",
    "\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'0.24.1'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import sklearn\n",
    "sklearn.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "random.seed(10661569)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"Data/ModelDevelopmentData.pkl\", \"rb\") as file :\n",
    "    model_developmnet_data = pickle.load(file)\n",
    "    \n",
    "xtrain, xtest, ytrain_ae, ytrain_pc, ytest_ae, ytest_pc = model_developmnet_data[\"xtrain\"], model_developmnet_data[\"xtest\"], model_developmnet_data[\"ytrain_ae\"], model_developmnet_data[\"ytrain_pc\"], model_developmnet_data[\"ytest_ae\"], model_developmnet_data[\"ytest_pc\"],"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train size 17271\n",
      "Train AE 497\n",
      "Train PC 1094\n",
      "\n",
      "Test size 287\n",
      "Test AE 130\n",
      "Test PC 150\n"
     ]
    }
   ],
   "source": [
    "print(\"Train size\", len(xtrain))\n",
    "print(\"Train AE\", sum(ytrain_ae))\n",
    "print(\"Train PC\", sum(ytrain_pc))\n",
    "print()\n",
    "print(\"Test size\", len(xtest))\n",
    "print(\"Test AE\", sum(ytest_ae))\n",
    "print(\"Test PC\", sum(ytest_pc))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PC\n",
      "Accuracy 0.662020905923345\n",
      "F1 0.5358851674641149\n",
      "Precision 0.9491525423728814\n",
      "Recall 0.37333333333333335\n",
      "\n",
      "[[134   3]\n",
      " [ 94  56]]\n",
      "AE\n",
      "Accuracy 0.6341463414634146\n",
      "F1 0.33121019108280253\n",
      "Precision 0.9629629629629629\n",
      "Recall 0.2\n",
      "\n",
      "[[156   1]\n",
      " [104  26]]\n"
     ]
    }
   ],
   "source": [
    "\"\"\"M1: LOGREG\"\"\"\n",
    "model1_pc = LogisticRegression(solver=\"liblinear\", random_state = 889988)\n",
    "\n",
    "model1_pc.fit( xtrain, ytrain_pc )\n",
    "preds1_pc = model1_pc.predict( xtest )\n",
    "\n",
    "\n",
    "acc_1pc = accuracy_score( ytest_pc, preds1_pc )\n",
    "f11_pc = f1_score( ytest_pc, preds1_pc )\n",
    "prec_1pc = precision_score( ytest_pc, preds1_pc )\n",
    "recall_1pc = recall_score( ytest_pc, preds1_pc)\n",
    "\n",
    "conf1_pc = confusion_matrix( ytest_pc, preds1_pc )\n",
    "\n",
    "print( \"PC\" )\n",
    "print(\"Accuracy\", acc_1pc)\n",
    "print(\"F1\", f11_pc)\n",
    "print(\"Precision\", prec_1pc)\n",
    "print(\"Recall\", recall_1pc)\n",
    "print()\n",
    "print(conf1_pc)\n",
    "\n",
    "\n",
    "model1_ae = LogisticRegression(solver=\"liblinear\", random_state = 889988)\n",
    "\n",
    "model1_ae.fit( xtrain, ytrain_ae )\n",
    "preds1_ae = model1_ae.predict( xtest )\n",
    "\n",
    "acc_1ae = accuracy_score( ytest_ae, preds1_ae )\n",
    "f11_ae = f1_score( ytest_ae, preds1_ae )\n",
    "prec_1ae = precision_score( ytest_ae, preds1_ae )\n",
    "recall_1ae = recall_score( ytest_ae, preds1_ae)\n",
    "\n",
    "conf1_ae = confusion_matrix( ytest_ae, preds1_ae )\n",
    "\n",
    "print(\"AE\")\n",
    "print(\"Accuracy\", acc_1ae )\n",
    "print(\"F1\", f11_ae)\n",
    "print(\"Precision\", prec_1ae)\n",
    "print(\"Recall\", recall_1ae)\n",
    "print()\n",
    "print(conf1_ae)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"Models/M1_pc.pkl\" , \"wb\") as file :\n",
    "    pickle.dump(model1_pc , file )\n",
    "with open(\"Models/M1_ae.pkl\", \"wb\") as file :\n",
    "    pickle.dump(model1_ae , file )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PC\n",
      "Accuracy 0.5993031358885017\n",
      "F1 0.7160493827160493\n",
      "Precision 0.5686274509803921\n",
      "Recall 0.9666666666666667\n",
      "\n",
      "[[ 27 110]\n",
      " [  5 145]]\n",
      "AE\n",
      "Accuracy 0.6341463414634146\n",
      "F1 0.7025495750708216\n",
      "Precision 0.5560538116591929\n",
      "Recall 0.9538461538461539\n",
      "\n",
      "[[ 58  99]\n",
      " [  6 124]]\n"
     ]
    }
   ],
   "source": [
    "\"\"\"M2: Naive Baies\"\"\"\n",
    "\n",
    "model2_pc = GaussianNB()\n",
    "\n",
    "model2_pc.fit( xtrain, ytrain_pc )\n",
    "preds2_pc = model2_pc.predict( xtest )\n",
    "\n",
    "acc_2pc = accuracy_score( ytest_pc, preds2_pc )\n",
    "f12_pc = f1_score( ytest_pc, preds2_pc )\n",
    "prec_2pc = precision_score( ytest_pc, preds2_pc )\n",
    "recall_2pc = recall_score( ytest_pc, preds2_pc)\n",
    "\n",
    "conf2_pc = confusion_matrix( ytest_pc, preds2_pc )\n",
    "\n",
    "print( \"PC\" )\n",
    "print(\"Accuracy\", acc_2pc)\n",
    "print(\"F1\", f12_pc)\n",
    "print(\"Precision\", prec_2pc)\n",
    "print(\"Recall\", recall_2pc)\n",
    "print()\n",
    "print(conf2_pc)\n",
    "\n",
    "\n",
    "model2_ae = GaussianNB()\n",
    "\n",
    "model2_ae.fit( xtrain, ytrain_ae )\n",
    "preds2_ae = model2_ae.predict( xtest )\n",
    "\n",
    "acc_2ae = accuracy_score( ytest_ae, preds2_ae )\n",
    "f12_ae = f1_score( ytest_ae, preds2_ae )\n",
    "prec_2ae = precision_score( ytest_ae, preds2_ae )\n",
    "recall_2ae = recall_score( ytest_ae, preds2_ae)\n",
    "\n",
    "conf2_ae = confusion_matrix( ytest_ae, preds2_ae )\n",
    "\n",
    "print(\"AE\")\n",
    "print(\"Accuracy\", acc_2ae )\n",
    "print(\"F1\", f12_ae)\n",
    "print(\"Precision\", prec_2ae)\n",
    "print(\"Recall\", recall_2ae)\n",
    "print()\n",
    "print(conf2_ae)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"Models/M2_pc.pkl\" , \"wb\") as file :\n",
    "    pickle.dump(model2_pc , file )\n",
    "with open(\"Models/M2_ae.pkl\", \"wb\") as file :\n",
    "    pickle.dump(model2_ae , file )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PC\n",
      "Accuracy 0.7317073170731707\n",
      "F1 0.717948717948718\n",
      "Precision 0.7967479674796748\n",
      "Recall 0.6533333333333333\n",
      "\n",
      "[[112  25]\n",
      " [ 52  98]]\n",
      "AE\n",
      "Accuracy 0.7770034843205574\n",
      "F1 0.7037037037037038\n",
      "Precision 0.8837209302325582\n",
      "Recall 0.5846153846153846\n",
      "\n",
      "[[147  10]\n",
      " [ 54  76]]\n"
     ]
    }
   ],
   "source": [
    "\"\"\"M3: SVM\"\"\"\n",
    "\n",
    "model3_pc = SVC( C = 50 , \n",
    "             class_weight = {0:1 , 1: 1000} , \n",
    "             kernel = \"rbf\" , #rbf : .59 ; poly :.053 ; sigmoid : 0.58 ; linear : 0.43 \n",
    "             decision_function_shape = \"ovo\")\n",
    "\n",
    "model3_pc.fit( xtrain, ytrain_pc )\n",
    "preds3_pc = model3_pc.predict( xtest )\n",
    "\n",
    "acc_3pc = accuracy_score( ytest_pc, preds3_pc )\n",
    "f13_pc = f1_score( ytest_pc, preds3_pc )\n",
    "prec_3pc = precision_score( ytest_pc, preds3_pc )\n",
    "recall_3pc = recall_score( ytest_pc, preds3_pc)\n",
    "\n",
    "conf3_pc = confusion_matrix( ytest_pc, preds3_pc )\n",
    "\n",
    "print( \"PC\" )\n",
    "print(\"Accuracy\", acc_3pc)\n",
    "print(\"F1\", f13_pc)\n",
    "print(\"Precision\", prec_3pc)\n",
    "print(\"Recall\", recall_3pc)\n",
    "print()\n",
    "print(conf3_pc)\n",
    "\n",
    "\n",
    "model3_ae = SVC( C = 50 , \n",
    "             class_weight = {0:1 , 1: 1000} , \n",
    "             kernel = \"rbf\" , #rbf : .59 ; poly :.053 ; sigmoid : 0.58 ; linear : 0.43 \n",
    "             decision_function_shape = \"ovo\")\n",
    "\n",
    "model3_ae.fit( xtrain, ytrain_ae )\n",
    "preds3_ae = model3_ae.predict( xtest )\n",
    "\n",
    "acc_3ae = accuracy_score( ytest_ae, preds3_ae )\n",
    "f13_ae = f1_score( ytest_ae, preds3_ae )\n",
    "prec_3ae = precision_score( ytest_ae, preds3_ae )\n",
    "recall_3ae = recall_score( ytest_ae, preds3_ae)\n",
    "\n",
    "conf3_ae = confusion_matrix( ytest_ae, preds3_ae )\n",
    "\n",
    "print(\"AE\")\n",
    "print(\"Accuracy\", acc_3ae )\n",
    "print(\"F1\", f13_ae)\n",
    "print(\"Precision\", prec_3ae)\n",
    "print(\"Recall\", recall_3ae)\n",
    "print()\n",
    "print(conf3_ae)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"Models/M3_pc.pkl\" , \"wb\") as file :\n",
    "    pickle.dump(model3_pc , file )\n",
    "with open(\"Models/M3_ae.pkl\", \"wb\") as file :\n",
    "    pickle.dump(model3_ae , file )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PC\n",
      "Accuracy 0.6794425087108014\n",
      "F1 0.603448275862069\n",
      "Precision 0.8536585365853658\n",
      "Recall 0.4666666666666667\n",
      "\n",
      "[[125  12]\n",
      " [ 80  70]]\n",
      "AE\n",
      "Accuracy 0.6759581881533101\n",
      "F1 0.46857142857142847\n",
      "Precision 0.9111111111111111\n",
      "Recall 0.3153846153846154\n",
      "\n",
      "[[153   4]\n",
      " [ 89  41]]\n"
     ]
    }
   ],
   "source": [
    "\"\"\"M4: MLP\"\"\"\n",
    "\n",
    "model4_pc = MLPClassifier( hidden_layer_sizes=( 100,20,12 ) , random_state=1001) \n",
    "\n",
    "\n",
    "model4_pc.fit( xtrain, ytrain_pc )\n",
    "preds4_pc = model4_pc.predict( xtest )\n",
    "\n",
    "acc_4pc = accuracy_score( ytest_pc, preds4_pc )\n",
    "f14_pc = f1_score( ytest_pc, preds4_pc )\n",
    "prec_4pc = precision_score( ytest_pc, preds4_pc )\n",
    "recall_4pc = recall_score( ytest_pc, preds4_pc)\n",
    "\n",
    "conf4_pc = confusion_matrix( ytest_pc, preds4_pc )\n",
    "\n",
    "print( \"PC\" )\n",
    "print(\"Accuracy\", acc_4pc)\n",
    "print(\"F1\", f14_pc)\n",
    "print(\"Precision\", prec_4pc)\n",
    "print(\"Recall\", recall_4pc)\n",
    "print()\n",
    "print(conf4_pc)\n",
    "\n",
    "\n",
    "model4_ae = MLPClassifier( hidden_layer_sizes=( 100,20,12 ) , random_state=1001)\n",
    "\n",
    "model4_ae.fit( xtrain, ytrain_ae )\n",
    "preds4_ae = model4_ae.predict( xtest )\n",
    "\n",
    "acc_4ae = accuracy_score( ytest_ae, preds4_ae )\n",
    "f14_ae = f1_score( ytest_ae, preds4_ae )\n",
    "prec_4ae = precision_score( ytest_ae, preds4_ae )\n",
    "recall_4ae = recall_score( ytest_ae, preds4_ae)\n",
    "\n",
    "conf4_ae = confusion_matrix( ytest_ae, preds4_ae )\n",
    "\n",
    "print(\"AE\")\n",
    "print(\"Accuracy\", acc_4ae )\n",
    "print(\"F1\", f14_ae)\n",
    "print(\"Precision\", prec_4ae)\n",
    "print(\"Recall\", recall_4ae)\n",
    "print()\n",
    "print(conf4_ae)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"Models/M4_pc.pkl\" , \"wb\") as file :\n",
    "    pickle.dump(model4_pc , file )\n",
    "with open(\"Models/M4_ae.pkl\", \"wb\") as file :\n",
    "    pickle.dump(model4_ae , file )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PC\n",
      "Accuracy 0.5888501742160279\n",
      "F1 0.7160493827160493\n",
      "Precision 0.7666666666666667\n",
      "Recall 0.30666666666666664\n",
      "\n",
      "[[123  14]\n",
      " [104  46]]\n",
      "AE\n",
      "Accuracy 0.5749128919860628\n",
      "F1 0.18666666666666665\n",
      "Precision 0.7\n",
      "Recall 0.1076923076923077\n",
      "\n",
      "[[151   6]\n",
      " [116  14]]\n"
     ]
    }
   ],
   "source": [
    "\"\"\"M5: KNN\"\"\"\n",
    "\n",
    "model5_pc = KNeighborsClassifier()\n",
    "\n",
    "\n",
    "model5_pc.fit( xtrain, ytrain_pc )\n",
    "preds5_pc = model5_pc.predict( xtest )\n",
    "\n",
    "acc_5pc = accuracy_score( ytest_pc, preds5_pc )\n",
    "f15_pc = f1_score( ytest_pc, preds5_pc )\n",
    "prec_5pc = precision_score( ytest_pc, preds5_pc )\n",
    "recall_5pc = recall_score( ytest_pc, preds5_pc)\n",
    "\n",
    "conf5_pc = confusion_matrix( ytest_pc, preds5_pc )\n",
    "\n",
    "print( \"PC\" )\n",
    "print(\"Accuracy\", acc_5pc)\n",
    "print(\"F1\", f12_pc)\n",
    "print(\"Precision\", prec_5pc)\n",
    "print(\"Recall\", recall_5pc)\n",
    "print()\n",
    "print(conf5_pc)\n",
    "\n",
    "\n",
    "model5_ae = KNeighborsClassifier()\n",
    "\n",
    "model5_ae.fit( xtrain, ytrain_ae )\n",
    "preds5_ae = model5_ae.predict( xtest )\n",
    "\n",
    "acc_5ae = accuracy_score( ytest_ae, preds5_ae )\n",
    "f15_ae = f1_score( ytest_ae, preds5_ae )\n",
    "prec_5ae = precision_score( ytest_ae, preds5_ae )\n",
    "recall_5ae = recall_score( ytest_ae, preds5_ae)\n",
    "\n",
    "conf5_ae = confusion_matrix( ytest_ae, preds5_ae )\n",
    "\n",
    "print(\"AE\")\n",
    "print(\"Accuracy\", acc_5ae )\n",
    "print(\"F1\", f15_ae)\n",
    "print(\"Precision\", prec_5ae)\n",
    "print(\"Recall\", recall_5ae)\n",
    "print()\n",
    "print(conf5_ae)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"Models/M5_pc.pkl\" , \"wb\") as file :\n",
    "    pickle.dump(model5_pc , file )\n",
    "with open(\"Models/M5_ae.pkl\", \"wb\") as file :\n",
    "    pickle.dump(model5_ae , file )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\\nCombined Models\\n'"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\"\"\"\n",
    "Combined Models\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AE\n",
      "Accuracy 0.8013937282229965\n",
      "F1 0.7532467532467533\n",
      "Precision 0.8613861386138614\n",
      "Recall 0.6692307692307692\n",
      "\n",
      "[[143  14]\n",
      " [ 43  87]]\n"
     ]
    }
   ],
   "source": [
    "comb_ae = []\n",
    "\n",
    "for index, item in enumerate( preds1_ae ) :\n",
    "    plh = item + preds2_ae[ index ] + preds3_ae[ index ] + preds4_ae[ index ] + preds5_ae[ index ] \n",
    "    \n",
    "    if plh >= 2 :\n",
    "        comb_ae.append( 1 )\n",
    "    else:\n",
    "        comb_ae.append( 0 )\n",
    "        \n",
    "acc_comb_ae = accuracy_score( ytest_ae, comb_ae )\n",
    "f1_comb_ae = f1_score( ytest_ae, comb_ae )\n",
    "prec_comb_ae = precision_score( ytest_ae, comb_ae )\n",
    "recall_comb_ae = recall_score( ytest_ae, comb_ae)\n",
    "\n",
    "conf_comb_ae = confusion_matrix( ytest_ae, comb_ae )\n",
    "\n",
    "print(\"AE\")\n",
    "print(\"Accuracy\", acc_comb_ae )\n",
    "print(\"F1\", f1_comb_ae)\n",
    "print(\"Precision\", prec_comb_ae)\n",
    "print(\"Recall\", recall_comb_ae)\n",
    "print()\n",
    "print(conf_comb_ae)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PC\n",
      "Accuracy 0.7491289198606271\n",
      "F1 0.76\n",
      "Precision 0.76\n",
      "Recall 0.76\n",
      "\n",
      "[[101  36]\n",
      " [ 36 114]]\n"
     ]
    }
   ],
   "source": [
    "comb_pc = []\n",
    "\n",
    "for index, item in enumerate( preds1_pc ) :\n",
    "    plh = item + preds2_pc[ index ] + preds3_pc[ index ] + preds4_pc[ index ] + preds5_pc[ index ] \n",
    "    if plh >= 2 :\n",
    "        comb_pc.append( 1 )\n",
    "    else:\n",
    "        comb_pc.append( 0 )\n",
    "        \n",
    "acc_comb_pc = accuracy_score( ytest_pc, comb_pc )\n",
    "f1_comb_pc = f1_score( ytest_pc, comb_pc )\n",
    "prec_comb_pc = precision_score( ytest_pc, comb_pc )\n",
    "recall_comb_pc = recall_score( ytest_pc, comb_pc)\n",
    "\n",
    "conf_comb_pc = confusion_matrix( ytest_pc, comb_pc )\n",
    "\n",
    "print( \"PC\" )\n",
    "print(\"Accuracy\", acc_comb_pc)\n",
    "print(\"F1\", f1_comb_pc)\n",
    "print(\"Precision\", prec_comb_pc)\n",
    "print(\"Recall\", recall_comb_pc)\n",
    "print()\n",
    "print(conf_comb_pc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
